In this notebook I'll introduce my results in a super resolution exercise. Our goal is to reverse a given picture in a low resultion into higher resultion.
Some of the learning methods this exercise will introduce are residual blocks, transfer learning, dilated convolution etc.
The data set we used is from the PASCAL Visual Object Classes Challenge 2007 of kaggle.
%pip install --user opencv-python
%pip install git https://github.com/titu1994/keras-efficientnets.git
from PIL import Image
import glob
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import *
import numpy as np
from keras.models import Model
from keras.applications.vgg16 import VGG16
import keras.backend as K
import tensorflow as tf
import gc
import cv2
import pandas as pd
from keras.models import model_from_json
from os import listdir
from os.path import join
import os
class LeakyRELU(LeakyReLU):
def __init__(self, **kwargs):
self.__name__ = "LeakyRELU"
super(LeakyRELU, self).__init__(**kwargs)
Our data consists of 5011 in different shapes. We are interested in creating data set <X,Y> such that X shape is (72,72,3), Y[0] shape is (144,144,3) and Y[1] shape is (288,288,3). In order to make our preprocess comfortable and efficient we'll use the lazy data structure Generator
# Some sampling from the data shapes
images = []
for im in listdir('first_20'):
images.append(cv2.imread('first_20/{}'.format(im)))
print(images[0].shape,images[1].shape, images[10].shape)
train_dir = 'VOCdevkit_v2/VOC2007/JPEGImages/train'
val_dir = 'VOCdevkit_v2/VOC2007/JPEGImages/val'
inp_shape = (72,72,3)
#Our memmory friendly generator
class MyGen(object):
def __init__(self, pti):
self._pti = pti
def __iter__(self):
i=0
x = []
y_144 = []
y_288 = []
for file in os.listdir(self._pti):
im = cv2.imread(os.path.join(self._pti, file))
if im is not None:
im = im/255. ##normalizing the picture
x.append(cv2.resize(im, (72,72,)))
y_144.append(cv2.resize(im, (144,144,)))
y_288.append(cv2.resize(im, (288,288,)))
i+=1
if i % 32 ==0:
yield (np.array(x), [np.array(y_144),np.array(y_288)])
x = []
y_144 = []
y_288 = []
class TrainGen(MyGen):
def __init__(self):
super(TrainGen, self).__init__(train_dir)
class ValGen(MyGen):
def __init__(self):
super(ValGen, self).__init__(val_dir)
images = list(
map(
lambda x: x/255,
images
)
)
im_72 = list(
map(
lambda x: cv2.resize(x,(72,72)),
images
)
)
im_144 = list(
map(
lambda x: cv2.resize(x,(144,144)),
images
)
)
im_288 = list(
map(
lambda x: cv2.resize(x,(288,288)),
images
)
)
#Checking our validation data
val_batch = next(ValGen().__iter__())
for some_index in [0, 8 , 30]:
_, ax = plt.subplots(1,3, figsize =(20,5))
ax[0].imshow(val_batch[0][some_index])
ax[0].set_title("77*77")
ax[1].imshow(val_batch[1][0][some_index])
ax[1].set_title("144*144")
ax[2].imshow(val_batch[1][1][some_index])
ax[2].set_title("288*288")
Our first model will be the simplest. We'll use two conolutional layers, keeping the our original horizontal and vertical dimension. We then upsample once for the 144*144 output and twice for the 288*288 output.
def train_model(model):
return model.fit_generator(
TrainGen().__iter__(),
steps_per_epoch=10,
validation_data = ValGen().__iter__(),
validation_steps = 1,
epochs = 5
)
def show_learning_history(history):
fig, ax = plt.subplots(1,1,figsize=(12,4))
# Plot training & validation loss values
ax.plot(history.history['loss'])
ax.plot(history.history['val_loss'])
ax.set_title('Model loss')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.legend(['Train', 'Test'], loc='upper left')
plt.show()
def examine_model(idx, x, y_144,y_288, model):
f_orig, axarr_orig = plt.subplots(1,1, figsize = (5,5), )
axarr_orig.imshow(
x[idx]
)
axarr_orig.set_title("orig 72*72")
f, axarr = plt.subplots(2,2, figsize = (10,10))
axarr[0][0].imshow(
y_144[idx]
)
axarr[0][0].set_title("orig 144*144")
axarr[0][1].imshow(
model.predict(np.expand_dims(x[idx], axis = 0))[0][0]
)
axarr[0][1].set_title("pred 144*144")
axarr[1][0].imshow(
y_288[idx]
)
axarr[1][0].set_title("orig 288*288")
axarr[1][1].imshow(
model.predict(np.expand_dims(x[idx], axis = 0))[1][0]
)
axarr[1][1].set_title("pred 288*288")
def create_simple_model():
inp = Input(shape=(72,72,3))
x = Conv2D(64, 3, padding = 'same')(inp)
x = Conv2D(64, 3,padding = 'same')(x)
x_144 = UpSampling2D(size = (2,2))(x)
x_288 = UpSampling2D(size = (4,4))(x)
x_144 =Conv2D(3 ,1)(x_144)
x_288 =Conv2D(3 ,1)(x_288)
return Model(inp, [x_144,x_288])
model = create_simple_model()
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()
h = train_model(model)
show_learning_history(h)
examine_model(
idx = 0,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = model
)
Our next strategy is going to be residual blocks, with an hope that our model will learn if some block of layers improves our results. Our residual block can be visualized like this:

And our whole model, shall look as follow:

def res_block():
inp = Input(shape = (None, None, 64))
x = Conv2D(64, 3, activation=LeakyRELU(alpha = 0.2), padding = 'same')(inp)
x = Conv2D(64, 3, activation=LeakyRELU(alpha = 0.2), padding = 'same')(x)
x = add([x, inp])
x = Activation(LeakyRELU(alpha = 0.2))(x)
return Model(inp, x)
def create_res_block_model(weights = None):
inp = Input(shape = (72,72,3))
x = Conv2D(64,1)(inp)
x = res_block()(x)
x = res_block()(x)
x = res_block()(x)
x = res_block()(x)
x_144 = UpSampling2D(size = (2,2))(x)
x_288 = UpSampling2D(size = (4,4))(x)
x_144 =Conv2D(3 ,1)(x_144)
x_288 =Conv2D(3 ,1)(x_288)
model = Model(inp, [x_144,x_288])
if weights is None:
return model
else:
model.load_weights(weights)
return model
res_block_model = create_res_block_model()
res_block_model.compile(optimizer = 'adam', loss = 'mse')
res_block_model.summary()
h = train_model(res_block_model)
show_learning_history(h)
examine_model(
idx = 0,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = res_block_model
)
examine_model(
idx = 1,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = res_block_model
)
We are tying this model with a sense that maybe it is "informable" to infere conclusion about a pixel taking into account not only it's adjecent neighbours but some of his neighbours of his neighbours. We will add 3 residual blocks withs 1 2 and four dialation rate.

def res_dialted_block():
inp = Input(shape=(None,None,32))
d1 = Conv2D(32, 3, padding='same', activation='relu', dilation_rate=1)(inp)
d2 = Conv2D(32, 3, padding='same', activation='relu', dilation_rate=2)(inp)
d4 = Conv2D(32, 3, padding='same', activation='relu', dilation_rate=4)(inp)
x = Concatenate()([d1, d2, d4])
x = Conv2D(32,1)(x)
x = Add()([x , inp])
x = Activation(LeakyRELU())(x)
return Model(inp,x)
def create_dilated_model():
inp = Input(shape = (72,72,3))
x = Conv2D(32,1 , padding='same', activation='relu')(inp)
x = res_dialted_block()(x)
x = res_dialted_block()(x)
x = UpSampling2D()(x)
x_144 = Conv2D(3,(1,1),activation='relu')(x)
x_288 = res_dialted_block()(x)
x_288 = UpSampling2D()(x_288)
x_288 = Conv2D(3,(1,1),activation='relu')(x_288)
return Model(inp, [x_144,x_288])
model = create_dilated_model()
model.compile(optimizer = 'adam', loss = 'mse')
model.summary()
h = train_model(model)
dilated_model = model
show_learning_history(h)
examine_model(
idx = 0,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = dilated_model
)
examine_model(
idx = 1,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = dilated_model
)
We can see that in this model the model didn't overfit quickly to the training set. So we we'll give it some more epoches.
h = train_model(dilated_model)
show_learning_history(h)
examine_model(
idx = 0,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = dilated_model
)
examine_model(
idx = 1,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = dilated_model
)
We will use VGG16 as a feature extractor to our model
def create_vgg_model():
VGG16(include_top =False).get_layer("block1_conv2")
inp = Input(shape = (72,72,3))
x = Conv2D(kernel_size = 1,filters = 64)(inp)
x = VGG16(include_top =False).get_layer("block1_conv2")(x)
x = concatenate([inp,x])
x = Conv2D(64, 3, padding = 'same')(inp)
x = Conv2D(64, 3,padding = 'same')(x)
x_144 = UpSampling2D(size = (2,2))(x)
x_288 = UpSampling2D(size = (4,4))(x)
x_144 =Conv2D(3 ,1)(x_144)
x_288 =Conv2D(3 ,1)(x_288)
model = Model(inp, [x_144,x_288])
return model
pre_trained_model = create_vgg_model()
pre_trained_model.compile(optimizer = 'adam', loss = 'mse')
pre_trained_model.summary()
h = train_model(pre_trained_model)
show_learning_history(h)
examine_model(
idx = 0,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = pre_trained_model
)
examine_model(
idx = 1,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = pre_trained_model
)
val_batch[1][0][1].shape
train_model(pre_trained_model)
pre_trained_model.save_weights(join("models", "vgg.h5"))
In this approach we're trying to use the number of features maps in one of our layers, hoping that it will reflect in a good way in the spacial dimension
def create_d2pace_model():
inp = Input(shape = (72,72,3))
x = Conv2D(64,1, activation=LeakyRELU(alpha = 0.2))(inp)
x = Activation(LeakyRELU(alpha = 0.2))(x)
x = res_block()(x)
x = res_block()(x)
x = res_block()(x)
x = res_block()(x)
x = res_block()(x)
x = res_block()(x)
x = res_block()(x)
x = res_block()(x)
x = Activation(LeakyRELU(alpha = 0.2))(x)
x_144 = Lambda(lambda x: tf.depth_to_space(input=x,block_size=2))(x)
x_144 = Activation(LeakyRELU(alpha = 0.2))(x_144)
x_288 =Lambda(lambda x: tf.depth_to_space(input=x,block_size=2,))(x_144)
x_288 = Activation(LeakyRELU(alpha = 0.2))(x_288)
x_144 =Conv2D(3 ,1, activation=LeakyRELU(alpha = 0.2))(x_144)
x_288 = Conv2D(3 ,1, activation=LeakyRELU(alpha = 0.2))(x_288)
return Model(inp, [x_144,x_288])
d2space_model = create_d2pace_model()
d2space_model.compile(optimizer = 'adam', loss = 'mse')
d2space_model.summary()
h = train_model(d2space_model)
show_learning_history(h)
examine_model(
idx = 0,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = d2space_model
)
examine_model(
idx = 1,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = d2space_model
)
pre_trained_model.save_weights(join("models", "d2space.h5"))
In this model we tried to reduce the depth of our net and keep it complex. We did it by making residual blocks to layers the precede the up sampling.
def create_more_res_model(weights = None):
inp = Input(shape = (72,72,3))
x = Conv2D(64,1)(inp)
res_1 = res_block()(x)
x = MaxPool2D()(res_1)
res_2 = res_block()(x)
x = MaxPool2D()(res_2)
x = UpSampling2D()(x)
x = add([x, res_2])
x = res_block()(x)
x = UpSampling2D()(x)
x = add([x, res_1])
x_144 = UpSampling2D()(x)
x_288 = UpSampling2D(size = (4,4))(x)
x_144 =Conv2D(3 ,1)(x_144)
x_288 =Conv2D(3 ,1)(x_288)
model = Model(inp, [x_144,x_288])
if weights is None:
return model
else:
model.load_weights(weights)
more_res_model = create_more_res_model()
more_res_model.compile(optimizer = 'adam', loss = 'mse')
more_res_model.summary()
h = train_model(more_res_model)
show_learning_history(h)
examine_model(
idx = 0,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = more_res_model
)
examine_model(
idx = 1,
x = val_batch[0],
y_144 = val_batch[1][0],
y_288 = val_batch[1][1],
model = more_res_model
)
pre_trained_model.save_weights(join("models", "more_res.h5"))
We tried few differnt approches. It seems as the dilated convolution approach was the best considering it's not bad output and low over fitting.